CU Amiga Super CD-ROM 6

home *** CD-ROM | disk | FTP | other *** search

/ CU Amiga Super CD-ROM 6 / CU Amiga Magazine's Super CD-ROM 06 (1996)(EMAP Images)(GB)(Track 1 of 4)[!][issue 1997-01].iso / cucd / prog / gnu-c / src / gcc-2.7.0-amiga / config / arm / lib1funcs.asm < prev next >

Wrap

Assembly Source File | 1995-06-15 | 30.0 KB | 1,599 lines

@ libgcc1 routines for ARM cpu. @ Division and remainder, from Appendix E of the Sparc Version 8 @ Architecture Manual, with fixes from Gordon Irlam. @ Rewritten for the ARM by Richard Earnshaw (rwe@pegasus.esprit.ec.org) /* Copyright (C) 1995 Free Software Foundation, Inc. This file is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. In addition to the permissions in the GNU General Public License, the Free Software Foundation gives you unlimited permission to link the compiled version of this file with other programs, and to distribute those programs without any restriction coming from the use of this file. (The General Public License restrictions do apply in other respects; for example, they cover modification of the file, and distribution when not linked into another program.) This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* As a special exception, if you link this library with other files, some of which are compiled with GCC, to produce an executable, this library does not by itself cause the resulting executable to be covered by the GNU General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU General Public License. */ /* * Input: dividend and divisor in r0 and r1 respectively. * * m4 parameters: * NAME name of function to generate * OP OP=div => r0 / r1; OP=mod => r0 % r1 * S S=true => signed; S=false => unsigned * * Algorithm parameters: * N how many bits per iteration we try to get (4) * WORDSIZE total number of bits (32) * * Derived constants: * TOPBITS number of bits in the top `decade' of a number * * Important variables: * Q the partial quotient under development (initially 0) * R the remainder so far, initially the dividend * ITER number of main division loop iterations required; * equal to ceil(log2(quotient) / N). Note that this * is the log base (2^N) of the quotient. * V the current comparand, initially divisor*2^(ITER*N-1) * * Cost: * Current estimate for non-large dividend is * ceil(log2(quotient) / N) * (10 + 7N/2) + C * A large dividend is one greater than 2^(31-TOPBITS) and takes a * different path, as the upper bits of the quotient must be developed * one bit at a time. */ /* define(N, `4')dnl define(WORDSIZE, `32')dnl define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl dnl define(dividend, `r0')dnl define(divisor, `r1')dnl define(Q, `r2')dnl define(R, `r3')dnl define(ITER, `ip')dnl define(V, `lr')dnl dnl dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d define(T, `r4')dnl define(SC, `r5')dnl ifelse(S, `true', `define(SIGN, `r6')')dnl define(REGLIST, `ifelse(S, `true', `{r4, r5, r6,', `{r4, r5,')')dnl define(ret, `ldmia sp!, REGLIST pc}')dnl dnl dnl This is the recursive definition for developing quotient digits. dnl dnl Parameters: dnl $1 the current depth, 1 <= $1 <= N dnl $2 the current accumulation of quotient bits dnl N max depth dnl dnl We add a new bit to $2 and either recurse or insert the bits in dnl the quotient. R, Q, and V are inputs and outputs as defined above; dnl the condition codes are expected to reflect the input R, and are dnl modified to reflect the output R. dnl define(DEVELOP_QUOTIENT_BITS, ` @ depth $1, accumulated bits $2 mov V, V, lsr #1 blt L.$1.eval(2^N+$2+999) @ remainder is positive subs R, R, V ifelse($1, N, ` ifelse(eval(2*$2+1<0), `0', `add Q, Q, `#'eval($2*2+1)', `sub Q, Q, `#'eval(-($2*2+1))') b 9f ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')') L.$1.eval(2^N+$2+999): @ remainder is negative adds R, R, V ifelse($1, N, ` ifelse(eval(2*$2-1<0), `0', `add Q, Q, `#'eval($2*2-1)', `sub Q, Q, `#'eval(-($2*2-1))') b 9f ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')') ifelse($1, 1, `9:')')dnl #include "trap.h" ip .req r12 sp .req r13 lr .req r14 pc .req r15 .text .globl NAME .align 0 NAME: stmdb sp!, REGLIST lr} ifelse(S, `true', ` @ compute sign of result; if neither is negative, no problem eor SIGN, divisor, dividend @ compute sign cmp divisor, #0 rsbmi divisor, divisor, #0 beq Ldiv_zero mov V, divisor movs R, dividend rsbmi R, R, #0 @ make dividend nonnegative ', ` @ Ready to divide. Compute size of quotient; scale comparand. movs V, divisor mov R, dividend beq Ldiv_zero ') cmp R, V @ if divisor exceeds dividend, done mov Q, #0 bcc Lgot_result @ (and algorithm fails otherwise) mov T, `#'(1 << (WORDSIZE - TOPBITS - 1)) cmp R, T mov ITER, #0 bcc Lnot_really_big @ `Here the dividend is >= 2^(31-N) or so. We must be careful here, @ as our usual N-at-a-shot divide step will cause overflow and havoc. @ The number of bits in the result here is N*ITER+SC, where SC <= N. @ Compute ITER in an unorthodox manner: know we need to shift V into @ the top decade: so do not even bother to compare to R.' mov SC, #1 1: cmp V, T bcs 3f mov V, V, lsl `#'N add ITER, ITER, #1 b 1b @ Now compute SC. 2: adds V, V, V add SC, SC, #1 bcc Lnot_too_big @ We get here if the divisor overflowed while shifting. @ This means that R has the high-order bit set. @ Restore V and subtract from R. mov T, T, lsl `#'TOPBITS mov V, V, lsr #1 add V, T, V sub SC, SC, #1 b Ldo_single_div Lnot_too_big: 3: cmp V, R bcc 2b @ beq Ldo_single_div /-* NB: these are commented out in the V8-Sparc manual as well *-/ /-* (I do not understand this) *-/ @ V > R: went too far: back up 1 step @ srl V, 1, V @ dec SC @ do single-bit divide steps @ @ We have to be careful here. We know that R >= V, so we can do the @ first divide step without thinking. BUT, the others are conditional, @ and are only done if R >= 0. Because both R and V may have the high- @ order bit set in the first step, just falling into the regular @ division loop will mess up the first time around. @ So we unroll slightly... Ldo_single_div: subs SC, SC, #1 blt Lend_regular_divide sub R, R, V mov Q, #1 b Lend_single_divloop Lsingle_divloop: cmp R, #0 mov Q, Q, lsl #1 mov V, V, lsr #1 @ R >= 0 subpl R, R, V addpl Q, Q, #1 @ R < 0 addmi R, R, V submi Q, Q, #1 Lend_single_divloop: subs SC, SC, #1 bge Lsingle_divloop b Lend_regular_divide 1: add ITER, ITER, #1 Lnot_really_big: mov V, V, lsl `#'N cmp V, R bls 1b @ @ HOW CAN ITER EVER BE -1 HERE ????? @ cmn ITER, #1 beq Lgot_result Ldivloop: cmp R, #0 @ set up for initial iteration mov Q, Q, lsl `#'N DEVELOP_QUOTIENT_BITS(1, 0) Lend_regular_divide: subs ITER, ITER, #1 bge Ldivloop cmp R, #0 @ non-restoring fixup here (one instruction only!) ifelse(OP, `div', ` sublt Q, Q, #1 ', ` addlt R, divisor, R ') Lgot_result: ifelse(S, `true', ` @ check to see if answer should be < 0 cmp SIGN, #0 ifelse(OP, `div', `rsbmi Q, Q, #0', `rsbmi R, R, #0') ') ifelse(OP, `div', `mov r0, Q', `mov r0, R') ret Ldiv_zero: @ Divide by zero trap. If it returns, return 0 (about as @ wrong as possible, but that is what SunOS does...). bl ___div0 mov r0, #0 ret */ #ifdef L_udivsi3 ip .req r12 sp .req r13 lr .req r14 pc .req r15 .text .globl ___udivsi3 .align 0 ___udivsi3: stmdb sp!, {r4, r5, lr} @ Ready to divide. Compute size of quotient; scale comparand. movs lr, r1 mov r3, r0 beq Ldiv_zero cmp r3, lr @ if r1 exceeds r0, done mov r2, #0 bcc Lgot_result @ (and algorithm fails otherwise) mov r4, #(1 << (32 - 4 - 1)) cmp r3, r4 mov ip, #0 bcc Lnot_really_big @ Here the dividend is >= 2^(31-N) or so. We must be careful here, @ as our usual N-at-a-shot divide step will cause overflow and havoc. @ The number of bits in the result here is N*ITER+SC, where SC <= N. @ Compute ITER in an unorthodox manner: know we need to shift V into @ the top decade: so do not even bother to compare to R. mov r5, #1 1: cmp lr, r4 bcs 3f mov lr, lr, lsl #4 add ip, ip, #1 b 1b @ Now compute r5. 2: adds lr, lr, lr add r5, r5, #1 bcc Lnot_too_big @ We get here if the r1 overflowed while shifting. @ This means that r3 has the high-order bit set. @ Restore lr and subtract from r3. mov r4, r4, lsl #4 mov lr, lr, lsr #1 add lr, r4, lr sub r5, r5, #1 b Ldo_single_div Lnot_too_big: 3: cmp lr, r3 bcc 2b @ beq Ldo_single_div /* NB: these are commented out in the V8-Sparc manual as well */ /* (I do not understand this) */ @ lr > r3: went too far: back up 1 step @ srl lr, 1, lr @ dec r5 @ do single-bit divide steps @ @ We have to be careful here. We know that r3 >= lr, so we can do the @ first divide step without thinking. BUT, the others are conditional, @ and are only done if r3 >= 0. Because both r3 and lr may have the high- @ order bit set in the first step, just falling into the regular @ division loop will mess up the first time around. @ So we unroll slightly... Ldo_single_div: subs r5, r5, #1 blt Lend_regular_divide sub r3, r3, lr mov r2, #1 b Lend_single_divloop Lsingle_divloop: cmp r3, #0 mov r2, r2, lsl #1 mov lr, lr, lsr #1 @ r3 >= 0 subpl r3, r3, lr addpl r2, r2, #1 @ r3 < 0 addmi r3, r3, lr submi r2, r2, #1 Lend_single_divloop: subs r5, r5, #1 bge Lsingle_divloop b Lend_regular_divide 1: add ip, ip, #1 Lnot_really_big: mov lr, lr, lsl #4 cmp lr, r3 bls 1b @ @ HOW CAN ip EVER BE -1 HERE ????? @ cmn ip, #1 beq Lgot_result Ldivloop: cmp r3, #0 @ set up for initial iteration mov r2, r2, lsl #4 @ depth 1, accumulated bits 0 mov lr, lr, lsr #1 blt L.1.1015 @ remainder is positive subs r3, r3, lr @ depth 2, accumulated bits 1 mov lr, lr, lsr #1 blt L.2.1016 @ remainder is positive subs r3, r3, lr @ depth 3, accumulated bits 3 mov lr, lr, lsr #1 blt L.3.1018 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits 7 mov lr, lr, lsr #1 blt L.4.1022 @ remainder is positive subs r3, r3, lr add r2, r2, #15 b 9f L.4.1022: @ remainder is negative adds r3, r3, lr add r2, r2, #13 b 9f L.3.1018: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits 5 mov lr, lr, lsr #1 blt L.4.1020 @ remainder is positive subs r3, r3, lr add r2, r2, #11 b 9f L.4.1020: @ remainder is negative adds r3, r3, lr add r2, r2, #9 b 9f L.2.1016: @ remainder is negative adds r3, r3, lr @ depth 3, accumulated bits 1 mov lr, lr, lsr #1 blt L.3.1016 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits 3 mov lr, lr, lsr #1 blt L.4.1018 @ remainder is positive subs r3, r3, lr add r2, r2, #7 b 9f L.4.1018: @ remainder is negative adds r3, r3, lr add r2, r2, #5 b 9f L.3.1016: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits 1 mov lr, lr, lsr #1 blt L.4.1016 @ remainder is positive subs r3, r3, lr add r2, r2, #3 b 9f L.4.1016: @ remainder is negative adds r3, r3, lr add r2, r2, #1 b 9f L.1.1015: @ remainder is negative adds r3, r3, lr @ depth 2, accumulated bits -1 mov lr, lr, lsr #1 blt L.2.1014 @ remainder is positive subs r3, r3, lr @ depth 3, accumulated bits -1 mov lr, lr, lsr #1 blt L.3.1014 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits -1 mov lr, lr, lsr #1 blt L.4.1014 @ remainder is positive subs r3, r3, lr sub r2, r2, #1 b 9f L.4.1014: @ remainder is negative adds r3, r3, lr sub r2, r2, #3 b 9f L.3.1014: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits -3 mov lr, lr, lsr #1 blt L.4.1012 @ remainder is positive subs r3, r3, lr sub r2, r2, #5 b 9f L.4.1012: @ remainder is negative adds r3, r3, lr sub r2, r2, #7 b 9f L.2.1014: @ remainder is negative adds r3, r3, lr @ depth 3, accumulated bits -3 mov lr, lr, lsr #1 blt L.3.1012 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits -5 mov lr, lr, lsr #1 blt L.4.1010 @ remainder is positive subs r3, r3, lr sub r2, r2, #9 b 9f L.4.1010: @ remainder is negative adds r3, r3, lr sub r2, r2, #11 b 9f L.3.1012: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits -7 mov lr, lr, lsr #1 blt L.4.1008 @ remainder is positive subs r3, r3, lr sub r2, r2, #13 b 9f L.4.1008: @ remainder is negative adds r3, r3, lr sub r2, r2, #15 b 9f 9: Lend_regular_divide: subs ip, ip, #1 bge Ldivloop cmp r3, #0 @ non-restoring fixup here (one instruction only!) sublt r2, r2, #1 Lgot_result: mov r0, r2 ldmia sp!, {r4, r5, pc} Ldiv_zero: @ Divide by zero trap. If it returns, return 0 (about as @ wrong as possible, but that is what SunOS does...). bl ___div0 mov r0, #0 ldmia sp!, {r4, r5, pc} #endif /* L_udivsi3 */ #ifdef L_divsi3 ip .req r12 sp .req r13 lr .req r14 pc .req r15 .text .globl ___divsi3 .align 0 ___divsi3: stmdb sp!, {r4, r5, r6, lr} @ compute sign of result; if neither is negative, no problem eor r6, r1, r0 @ compute sign cmp r1, #0 rsbmi r1, r1, #0 beq Ldiv_zero mov lr, r1 movs r3, r0 rsbmi r3, r3, #0 @ make dividend nonnegative cmp r3, lr @ if r1 exceeds r0, done mov r2, #0 bcc Lgot_result @ (and algorithm fails otherwise) mov r4, #(1 << (32 - 4 - 1)) cmp r3, r4 mov ip, #0 bcc Lnot_really_big @ Here the dividend is >= 2^(31-N) or so. We must be careful here, @ as our usual N-at-a-shot divide step will cause overflow and havoc. @ The number of bits in the result here is N*ITER+SC, where SC <= N. @ Compute ITER in an unorthodox manner: know we need to shift V into @ the top decade: so do not even bother to compare to R. mov r5, #1 1: cmp lr, r4 bcs 3f mov lr, lr, lsl #4 add ip, ip, #1 b 1b @ Now compute r5. 2: adds lr, lr, lr add r5, r5, #1 bcc Lnot_too_big @ We get here if the r1 overflowed while shifting. @ This means that r3 has the high-order bit set. @ Restore lr and subtract from r3. mov r4, r4, lsl #4 mov lr, lr, lsr #1 add lr, r4, lr sub r5, r5, #1 b Ldo_single_div Lnot_too_big: 3: cmp lr, r3 bcc 2b @ beq Ldo_single_div /* NB: these are commented out in the V8-Sparc manual as well */ /* (I do not understand this) */ @ lr > r3: went too far: back up 1 step @ srl lr, 1, lr @ dec r5 @ do single-bit divide steps @ @ We have to be careful here. We know that r3 >= lr, so we can do the @ first divide step without thinking. BUT, the others are conditional, @ and are only done if r3 >= 0. Because both r3 and lr may have the high- @ order bit set in the first step, just falling into the regular @ division loop will mess up the first time around. @ So we unroll slightly... Ldo_single_div: subs r5, r5, #1 blt Lend_regular_divide sub r3, r3, lr mov r2, #1 b Lend_single_divloop Lsingle_divloop: cmp r3, #0 mov r2, r2, lsl #1 mov lr, lr, lsr #1 @ r3 >= 0 subpl r3, r3, lr addpl r2, r2, #1 @ r3 < 0 addmi r3, r3, lr submi r2, r2, #1 Lend_single_divloop: subs r5, r5, #1 bge Lsingle_divloop b Lend_regular_divide 1: add ip, ip, #1 Lnot_really_big: mov lr, lr, lsl #4 cmp lr, r3 bls 1b @ @ HOW CAN ip EVER BE -1 HERE ????? @ cmn ip, #1 beq Lgot_result Ldivloop: cmp r3, #0 @ set up for initial iteration mov r2, r2, lsl #4 @ depth 1, accumulated bits 0 mov lr, lr, lsr #1 blt L.1.1015 @ remainder is positive subs r3, r3, lr @ depth 2, accumulated bits 1 mov lr, lr, lsr #1 blt L.2.1016 @ remainder is positive subs r3, r3, lr @ depth 3, accumulated bits 3 mov lr, lr, lsr #1 blt L.3.1018 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits 7 mov lr, lr, lsr #1 blt L.4.1022 @ remainder is positive subs r3, r3, lr add r2, r2, #15 b 9f L.4.1022: @ remainder is negative adds r3, r3, lr add r2, r2, #13 b 9f L.3.1018: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits 5 mov lr, lr, lsr #1 blt L.4.1020 @ remainder is positive subs r3, r3, lr add r2, r2, #11 b 9f L.4.1020: @ remainder is negative adds r3, r3, lr add r2, r2, #9 b 9f L.2.1016: @ remainder is negative adds r3, r3, lr @ depth 3, accumulated bits 1 mov lr, lr, lsr #1 blt L.3.1016 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits 3 mov lr, lr, lsr #1 blt L.4.1018 @ remainder is positive subs r3, r3, lr add r2, r2, #7 b 9f L.4.1018: @ remainder is negative adds r3, r3, lr add r2, r2, #5 b 9f L.3.1016: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits 1 mov lr, lr, lsr #1 blt L.4.1016 @ remainder is positive subs r3, r3, lr add r2, r2, #3 b 9f L.4.1016: @ remainder is negative adds r3, r3, lr add r2, r2, #1 b 9f L.1.1015: @ remainder is negative adds r3, r3, lr @ depth 2, accumulated bits -1 mov lr, lr, lsr #1 blt L.2.1014 @ remainder is positive subs r3, r3, lr @ depth 3, accumulated bits -1 mov lr, lr, lsr #1 blt L.3.1014 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits -1 mov lr, lr, lsr #1 blt L.4.1014 @ remainder is positive subs r3, r3, lr sub r2, r2, #1 b 9f L.4.1014: @ remainder is negative adds r3, r3, lr sub r2, r2, #3 b 9f L.3.1014: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits -3 mov lr, lr, lsr #1 blt L.4.1012 @ remainder is positive subs r3, r3, lr sub r2, r2, #5 b 9f L.4.1012: @ remainder is negative adds r3, r3, lr sub r2, r2, #7 b 9f L.2.1014: @ remainder is negative adds r3, r3, lr @ depth 3, accumulated bits -3 mov lr, lr, lsr #1 blt L.3.1012 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits -5 mov lr, lr, lsr #1 blt L.4.1010 @ remainder is positive subs r3, r3, lr sub r2, r2, #9 b 9f L.4.1010: @ remainder is negative adds r3, r3, lr sub r2, r2, #11 b 9f L.3.1012: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits -7 mov lr, lr, lsr #1 blt L.4.1008 @ remainder is positive subs r3, r3, lr sub r2, r2, #13 b 9f L.4.1008: @ remainder is negative adds r3, r3, lr sub r2, r2, #15 b 9f 9: Lend_regular_divide: subs ip, ip, #1 bge Ldivloop cmp r3, #0 @ non-restoring fixup here (one instruction only!) sublt r2, r2, #1 Lgot_result: @ check to see if answer should be < 0 cmp r6, #0 rsbmi r2, r2, #0 mov r0, r2 ldmia sp!, {r4, r5, r6, pc} Ldiv_zero: @ Divide by zero trap. If it returns, return 0 (about as @ wrong as possible, but that is what SunOS does...). bl ___div0 mov r0, #0 ldmia sp!, {r4, r5, r6, pc} #endif /* L_divsi3 */ #ifdef L_umodsi3 ip .req r12 sp .req r13 lr .req r14 pc .req r15 .text .globl ___umodsi3 .align 0 ___umodsi3: stmdb sp!, {r4, r5, lr} @ Ready to divide. Compute size of quotient; scale comparand. movs lr, r1 mov r3, r0 beq Ldiv_zero cmp r3, lr @ if r1 exceeds r0, done mov r2, #0 bcc Lgot_result @ (and algorithm fails otherwise) mov r4, #(1 << (32 - 4 - 1)) cmp r3, r4 mov ip, #0 bcc Lnot_really_big @ Here the dividend is >= 2^(31-N) or so. We must be careful here, @ as our usual N-at-a-shot divide step will cause overflow and havoc. @ The number of bits in the result here is N*ITER+SC, where SC <= N. @ Compute ITER in an unorthodox manner: know we need to shift V into @ the top decade: so do not even bother to compare to R. mov r5, #1 1: cmp lr, r4 bcs 3f mov lr, lr, lsl #4 add ip, ip, #1 b 1b @ Now compute r5. 2: adds lr, lr, lr add r5, r5, #1 bcc Lnot_too_big @ We get here if the r1 overflowed while shifting. @ This means that r3 has the high-order bit set. @ Restore lr and subtract from r3. mov r4, r4, lsl #4 mov lr, lr, lsr #1 add lr, r4, lr sub r5, r5, #1 b Ldo_single_div Lnot_too_big: 3: cmp lr, r3 bcc 2b @ beq Ldo_single_div /* NB: these are commented out in the V8-Sparc manual as well */ /* (I do not understand this) */ @ lr > r3: went too far: back up 1 step @ srl lr, 1, lr @ dec r5 @ do single-bit divide steps @ @ We have to be careful here. We know that r3 >= lr, so we can do the @ first divide step without thinking. BUT, the others are conditional, @ and are only done if r3 >= 0. Because both r3 and lr may have the high- @ order bit set in the first step, just falling into the regular @ division loop will mess up the first time around. @ So we unroll slightly... Ldo_single_div: subs r5, r5, #1 blt Lend_regular_divide sub r3, r3, lr mov r2, #1 b Lend_single_divloop Lsingle_divloop: cmp r3, #0 mov r2, r2, lsl #1 mov lr, lr, lsr #1 @ r3 >= 0 subpl r3, r3, lr addpl r2, r2, #1 @ r3 < 0 addmi r3, r3, lr submi r2, r2, #1 Lend_single_divloop: subs r5, r5, #1 bge Lsingle_divloop b Lend_regular_divide 1: add ip, ip, #1 Lnot_really_big: mov lr, lr, lsl #4 cmp lr, r3 bls 1b @ @ HOW CAN ip EVER BE -1 HERE ????? @ cmn ip, #1 beq Lgot_result Ldivloop: cmp r3, #0 @ set up for initial iteration mov r2, r2, lsl #4 @ depth 1, accumulated bits 0 mov lr, lr, lsr #1 blt L.1.1015 @ remainder is positive subs r3, r3, lr @ depth 2, accumulated bits 1 mov lr, lr, lsr #1 blt L.2.1016 @ remainder is positive subs r3, r3, lr @ depth 3, accumulated bits 3 mov lr, lr, lsr #1 blt L.3.1018 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits 7 mov lr, lr, lsr #1 blt L.4.1022 @ remainder is positive subs r3, r3, lr add r2, r2, #15 b 9f L.4.1022: @ remainder is negative adds r3, r3, lr add r2, r2, #13 b 9f L.3.1018: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits 5 mov lr, lr, lsr #1 blt L.4.1020 @ remainder is positive subs r3, r3, lr add r2, r2, #11 b 9f L.4.1020: @ remainder is negative adds r3, r3, lr add r2, r2, #9 b 9f L.2.1016: @ remainder is negative adds r3, r3, lr @ depth 3, accumulated bits 1 mov lr, lr, lsr #1 blt L.3.1016 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits 3 mov lr, lr, lsr #1 blt L.4.1018 @ remainder is positive subs r3, r3, lr add r2, r2, #7 b 9f L.4.1018: @ remainder is negative adds r3, r3, lr add r2, r2, #5 b 9f L.3.1016: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits 1 mov lr, lr, lsr #1 blt L.4.1016 @ remainder is positive subs r3, r3, lr add r2, r2, #3 b 9f L.4.1016: @ remainder is negative adds r3, r3, lr add r2, r2, #1 b 9f L.1.1015: @ remainder is negative adds r3, r3, lr @ depth 2, accumulated bits -1 mov lr, lr, lsr #1 blt L.2.1014 @ remainder is positive subs r3, r3, lr @ depth 3, accumulated bits -1 mov lr, lr, lsr #1 blt L.3.1014 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits -1 mov lr, lr, lsr #1 blt L.4.1014 @ remainder is positive subs r3, r3, lr sub r2, r2, #1 b 9f L.4.1014: @ remainder is negative adds r3, r3, lr sub r2, r2, #3 b 9f L.3.1014: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits -3 mov lr, lr, lsr #1 blt L.4.1012 @ remainder is positive subs r3, r3, lr sub r2, r2, #5 b 9f L.4.1012: @ remainder is negative adds r3, r3, lr sub r2, r2, #7 b 9f L.2.1014: @ remainder is negative adds r3, r3, lr @ depth 3, accumulated bits -3 mov lr, lr, lsr #1 blt L.3.1012 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits -5 mov lr, lr, lsr #1 blt L.4.1010 @ remainder is positive subs r3, r3, lr sub r2, r2, #9 b 9f L.4.1010: @ remainder is negative adds r3, r3, lr sub r2, r2, #11 b 9f L.3.1012: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits -7 mov lr, lr, lsr #1 blt L.4.1008 @ remainder is positive subs r3, r3, lr sub r2, r2, #13 b 9f L.4.1008: @ remainder is negative adds r3, r3, lr sub r2, r2, #15 b 9f 9: Lend_regular_divide: subs ip, ip, #1 bge Ldivloop cmp r3, #0 @ non-restoring fixup here (one instruction only!) addlt r3, r1, r3 Lgot_result: mov r0, r3 ldmia sp!, {r4, r5, pc} Ldiv_zero: @ Divide by zero trap. If it returns, return 0 (about as @ wrong as possible, but that is what SunOS does...). bl ___div0 mov r0, #0 ldmia sp!, {r4, r5, pc} #endif /* L_umodsi3 */ #ifdef L_modsi3 ip .req r12 sp .req r13 lr .req r14 pc .req r15 .text .globl ___modsi3 .align 0 ___modsi3: stmdb sp!, {r4, r5, r6, lr} @ compute sign of result; if neither is negative, no problem eor r6, r1, r0 @ compute sign cmp r1, #0 rsbmi r1, r1, #0 beq Ldiv_zero mov lr, r1 movs r3, r0 rsbmi r3, r3, #0 @ make dividend nonnegative cmp r3, lr @ if r1 exceeds r0, done mov r2, #0 bcc Lgot_result @ (and algorithm fails otherwise) mov r4, #(1 << (32 - 4 - 1)) cmp r3, r4 mov ip, #0 bcc Lnot_really_big @ Here the dividend is >= 2^(31-N) or so. We must be careful here, @ as our usual N-at-a-shot divide step will cause overflow and havoc. @ The number of bits in the result here is N*ITER+SC, where SC <= N. @ Compute ITER in an unorthodox manner: know we need to shift V into @ the top decade: so do not even bother to compare to R. mov r5, #1 1: cmp lr, r4 bcs 3f mov lr, lr, lsl #4 add ip, ip, #1 b 1b @ Now compute r5. 2: adds lr, lr, lr add r5, r5, #1 bcc Lnot_too_big @ We get here if the r1 overflowed while shifting. @ This means that r3 has the high-order bit set. @ Restore lr and subtract from r3. mov r4, r4, lsl #4 mov lr, lr, lsr #1 add lr, r4, lr sub r5, r5, #1 b Ldo_single_div Lnot_too_big: 3: cmp lr, r3 bcc 2b @ beq Ldo_single_div /* NB: these are commented out in the V8-Sparc manual as well */ /* (I do not understand this) */ @ lr > r3: went too far: back up 1 step @ srl lr, 1, lr @ dec r5 @ do single-bit divide steps @ @ We have to be careful here. We know that r3 >= lr, so we can do the @ first divide step without thinking. BUT, the others are conditional, @ and are only done if r3 >= 0. Because both r3 and lr may have the high- @ order bit set in the first step, just falling into the regular @ division loop will mess up the first time around. @ So we unroll slightly... Ldo_single_div: subs r5, r5, #1 blt Lend_regular_divide sub r3, r3, lr mov r2, #1 b Lend_single_divloop Lsingle_divloop: cmp r3, #0 mov r2, r2, lsl #1 mov lr, lr, lsr #1 @ r3 >= 0 subpl r3, r3, lr addpl r2, r2, #1 @ r3 < 0 addmi r3, r3, lr submi r2, r2, #1 Lend_single_divloop: subs r5, r5, #1 bge Lsingle_divloop b Lend_regular_divide 1: add ip, ip, #1 Lnot_really_big: mov lr, lr, lsl #4 cmp lr, r3 bls 1b @ @ HOW CAN ip EVER BE -1 HERE ????? @ cmn ip, #1 beq Lgot_result Ldivloop: cmp r3, #0 @ set up for initial iteration mov r2, r2, lsl #4 @ depth 1, accumulated bits 0 mov lr, lr, lsr #1 blt L.1.1015 @ remainder is positive subs r3, r3, lr @ depth 2, accumulated bits 1 mov lr, lr, lsr #1 blt L.2.1016 @ remainder is positive subs r3, r3, lr @ depth 3, accumulated bits 3 mov lr, lr, lsr #1 blt L.3.1018 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits 7 mov lr, lr, lsr #1 blt L.4.1022 @ remainder is positive subs r3, r3, lr add r2, r2, #15 b 9f L.4.1022: @ remainder is negative adds r3, r3, lr add r2, r2, #13 b 9f L.3.1018: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits 5 mov lr, lr, lsr #1 blt L.4.1020 @ remainder is positive subs r3, r3, lr add r2, r2, #11 b 9f L.4.1020: @ remainder is negative adds r3, r3, lr add r2, r2, #9 b 9f L.2.1016: @ remainder is negative adds r3, r3, lr @ depth 3, accumulated bits 1 mov lr, lr, lsr #1 blt L.3.1016 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits 3 mov lr, lr, lsr #1 blt L.4.1018 @ remainder is positive subs r3, r3, lr add r2, r2, #7 b 9f L.4.1018: @ remainder is negative adds r3, r3, lr add r2, r2, #5 b 9f L.3.1016: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits 1 mov lr, lr, lsr #1 blt L.4.1016 @ remainder is positive subs r3, r3, lr add r2, r2, #3 b 9f L.4.1016: @ remainder is negative adds r3, r3, lr add r2, r2, #1 b 9f L.1.1015: @ remainder is negative adds r3, r3, lr @ depth 2, accumulated bits -1 mov lr, lr, lsr #1 blt L.2.1014 @ remainder is positive subs r3, r3, lr @ depth 3, accumulated bits -1 mov lr, lr, lsr #1 blt L.3.1014 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits -1 mov lr, lr, lsr #1 blt L.4.1014 @ remainder is positive subs r3, r3, lr sub r2, r2, #1 b 9f L.4.1014: @ remainder is negative adds r3, r3, lr sub r2, r2, #3 b 9f L.3.1014: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits -3 mov lr, lr, lsr #1 blt L.4.1012 @ remainder is positive subs r3, r3, lr sub r2, r2, #5 b 9f L.4.1012: @ remainder is negative adds r3, r3, lr sub r2, r2, #7 b 9f L.2.1014: @ remainder is negative adds r3, r3, lr @ depth 3, accumulated bits -3 mov lr, lr, lsr #1 blt L.3.1012 @ remainder is positive subs r3, r3, lr @ depth 4, accumulated bits -5 mov lr, lr, lsr #1 blt L.4.1010 @ remainder is positive subs r3, r3, lr sub r2, r2, #9 b 9f L.4.1010: @ remainder is negative adds r3, r3, lr sub r2, r2, #11 b 9f L.3.1012: @ remainder is negative adds r3, r3, lr @ depth 4, accumulated bits -7 mov lr, lr, lsr #1 blt L.4.1008 @ remainder is positive subs r3, r3, lr sub r2, r2, #13 b 9f L.4.1008: @ remainder is negative adds r3, r3, lr sub r2, r2, #15 b 9f 9: Lend_regular_divide: subs ip, ip, #1 bge Ldivloop cmp r3, #0 @ non-restoring fixup here (one instruction only!) addlt r3, r1, r3 Lgot_result: @ check to see if answer should be < 0 cmp r6, #0 rsbmi r3, r3, #0 mov r0, r3 ldmia sp!, {r4, r5, r6, pc} Ldiv_zero: @ Divide by zero trap. If it returns, return 0 (about as @ wrong as possible, but that is what SunOS does...). bl ___div0 mov r0, #0 ldmia sp!, {r4, r5, r6, pc} #endif /* L_modsi3 */ #ifdef L_divmodsi_tools .globl ___div0 .align 0 ___div0: mov pc, lr #endif /* L_divmodsi_tools */